Análisis Descriptivo

# librerías necesarias para implementar las funciones
library(readxl)
library(glue)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggmosaic)
library(ggridges)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ data.table::first()   masks dplyr::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ data.table::last()    masks dplyr::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ lubridate::second()   masks data.table::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following objects are masked from 'package:data.table':
## 
##     first, last
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(xtable)
library(here)
## here() starts at /Users/sofiabocker/Desktop/universidad/UCR/Actuariales/Cuarto año/I Ciclo/Estadística Actuarial I/Proyecto/cod
library(skimr) 
library(kableExtra) 
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(rcompanion)
library(RCurl)
## 
## Attaching package: 'RCurl'
## 
## The following object is masked from 'package:tidyr':
## 
##     complete
# importar base de datos 

# Link de la base de datos en github
url <- "https://raw.githubusercontent.com/sofiabocker/proyecto_ca_0303_g08/main/base_datos_alcohol.xlsx"

# descargar el archivo 
binary_data <- getBinaryURL(url)
temp_file <- tempfile(fileext = ".xlsx")
writeBin(binary_data, temp_file)

# leer el archivo en R
base_datos <- read_excel(temp_file)
## New names:
## • `` -> `...32`
## • `` -> `...33`
base_datos <- base_datos [, -32]
base_datos <- base_datos [, -32]
base_datos <- head(base_datos, -25)

# Comprimir las variables de 5 categorías en variables de tres categorías <

base_datos_clean <- base_datos %>%
  clean_names() %>%
  mutate(alcohol_weekdays = fct_collapse(
    alcohol_weekdays,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

#  Asegurarse que los datos se mantengan como characters

base_datos_clean$alcohol_weekdays <- as.character(base_datos_clean$alcohol_weekdays)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(alcohol_weekends = fct_collapse(
    alcohol_weekends,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$alcohol_weekends <- as.character(base_datos_clean$alcohol_weekends)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(health_status = fct_collapse(
    health_status ,
    Poor = c("Poor", "Very Poor"),
    Good = c("Very Good", "Good"),
    Fair = "Fair"
  ))

base_datos_clean$health_status <- as.character(base_datos_clean$health_status)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(good_family_relationship = fct_collapse(
    good_family_relationship,
    Poor = c("Poor", "Very Poor"),
    Good = c("Excellent", "Good"),
    Fair = "Fair"
  ))

base_datos_clean$good_family_relationship <- as.character(base_datos_clean$good_family_relationship)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(free_time_after_school = fct_collapse(
    free_time_after_school,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$free_time_after_school <- as.character(base_datos_clean$free_time_after_school)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(time_with_friends = fct_collapse(
    time_with_friends,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$time_with_friends <- as.character(base_datos_clean$time_with_friends)
# muestra la estructura de los datos
str <- str(base_datos_clean) 
## tibble [649 × 31] (S3: tbl_df/tbl/data.frame)
##  $ school                      : chr [1:649] "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" ...
##  $ gender                      : chr [1:649] "Female" "Female" "Female" "Female" ...
##  $ age                         : num [1:649] 18 17 15 15 16 16 16 17 15 15 ...
##  $ housing_type                : chr [1:649] "Urban" "Urban" "Urban" "Urban" ...
##  $ family_size                 : chr [1:649] "Above 3" "Above 3" "Up to 3" "Above 3" ...
##  $ parental_status             : chr [1:649] "Separated" "Living Together" "Living Together" "Living Together" ...
##  $ mother_education            : chr [1:649] "Higher Education" "Primary School" "Primary School" "Higher Education" ...
##  $ father_education            : chr [1:649] "Higher Education" "Primary School" "Primary School" "Lower Secondary School" ...
##  $ mother_work                 : chr [1:649] "Homemaker" "Homemaker" "Homemaker" "Health" ...
##  $ father_work                 : chr [1:649] "Teacher" "other" "other" "Services" ...
##  $ reason_school_choice        : chr [1:649] "Course Preference" "Course Preference" "Other" "Near Home" ...
##  $ legal_responsibility        : chr [1:649] "Mother" "Father" "Mother" "Mother" ...
##  $ commute_time                : chr [1:649] "15 to 30 min" "Up to 15 min" "Up to 15 min" "Up to 15 min" ...
##  $ weekly_study_time           : chr [1:649] "2 to 5h" "2 to 5h" "2 to 5h" "5 to 10h" ...
##  $ extra_educational_support   : chr [1:649] "Yes" "No" "Yes" "No" ...
##  $ parental_educational_support: chr [1:649] "No" "Yes" "No" "Yes" ...
##  $ private_tutoring            : chr [1:649] "No" "No" "No" "No" ...
##  $ extracurricular_activities  : chr [1:649] "No" "No" "No" "Yes" ...
##  $ attended_daycare            : chr [1:649] "Yes" "No" "Yes" "Yes" ...
##  $ desire_graduate_education   : chr [1:649] "Yes" "Yes" "Yes" "Yes" ...
##  $ has_internet                : chr [1:649] "No" "Yes" "Yes" "Yes" ...
##  $ is_dating                   : chr [1:649] "No" "No" "No" "Yes" ...
##  $ good_family_relationship    : chr [1:649] "Good" "Good" "Good" "Fair" ...
##  $ free_time_after_school      : chr [1:649] "Moderate" "Moderate" "Moderate" "Low" ...
##  $ time_with_friends           : chr [1:649] "High" "Moderate" "Low" "Low" ...
##  $ alcohol_weekdays            : chr [1:649] "Low" "Low" "Low" "Low" ...
##  $ alcohol_weekends            : chr [1:649] "Low" "Low" "Moderate" "Low" ...
##  $ health_status               : chr [1:649] "Fair" "Fair" "Fair" "Good" ...
##  $ school_absence              : num [1:649] 4 2 6 0 0 6 0 2 0 0 ...
##  $ grade_1st_semester          : num [1:649] 0 9 12 14 11 12 13 10 15 12 ...
##  $ grade_2nd_semester          : num [1:649] 11 11 13 14 13 12 12 13 16 12 ...
# resumen general de la base de datos

summary(base_datos_clean)
##     school             gender               age        housing_type      
##  Length:649         Length:649         Min.   :15.00   Length:649        
##  Class :character   Class :character   1st Qu.:16.00   Class :character  
##  Mode  :character   Mode  :character   Median :17.00   Mode  :character  
##                                        Mean   :16.74                     
##                                        3rd Qu.:18.00                     
##                                        Max.   :22.00                     
##  family_size        parental_status    mother_education   father_education  
##  Length:649         Length:649         Length:649         Length:649        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  mother_work        father_work        reason_school_choice
##  Length:649         Length:649         Length:649          
##  Class :character   Class :character   Class :character    
##  Mode  :character   Mode  :character   Mode  :character    
##                                                            
##                                                            
##                                                            
##  legal_responsibility commute_time       weekly_study_time 
##  Length:649           Length:649         Length:649        
##  Class :character     Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character  
##                                                            
##                                                            
##                                                            
##  extra_educational_support parental_educational_support private_tutoring  
##  Length:649                Length:649                   Length:649        
##  Class :character          Class :character             Class :character  
##  Mode  :character          Mode  :character             Mode  :character  
##                                                                           
##                                                                           
##                                                                           
##  extracurricular_activities attended_daycare   desire_graduate_education
##  Length:649                 Length:649         Length:649               
##  Class :character           Class :character   Class :character         
##  Mode  :character           Mode  :character   Mode  :character         
##                                                                         
##                                                                         
##                                                                         
##  has_internet        is_dating         good_family_relationship
##  Length:649         Length:649         Length:649              
##  Class :character   Class :character   Class :character        
##  Mode  :character   Mode  :character   Mode  :character        
##                                                                
##                                                                
##                                                                
##  free_time_after_school time_with_friends  alcohol_weekdays  
##  Length:649             Length:649         Length:649        
##  Class :character       Class :character   Class :character  
##  Mode  :character       Mode  :character   Mode  :character  
##                                                              
##                                                              
##                                                              
##  alcohol_weekends   health_status      school_absence   grade_1st_semester
##  Length:649         Length:649         Min.   : 0.000   Min.   : 0.0      
##  Class :character   Class :character   1st Qu.: 0.000   1st Qu.:10.0      
##  Mode  :character   Mode  :character   Median : 2.000   Median :11.0      
##                                        Mean   : 3.659   Mean   :11.4      
##                                        3rd Qu.: 6.000   3rd Qu.:13.0      
##                                        Max.   :32.000   Max.   :19.0      
##  grade_2nd_semester
##  Min.   : 0.00     
##  1st Qu.:10.00     
##  Median :11.00     
##  Mean   :11.57     
##  3rd Qu.:13.00     
##  Max.   :19.00
# explora data

skimr::skim(base_datos_clean) 
Data summary
Name base_datos_clean
Number of rows 649
Number of columns 31
_______________________
Column type frequency:
character 27
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
school 0 1 15 20 0 2 0
gender 0 1 4 6 0 2 0
housing_type 0 1 5 5 0 2 0
family_size 0 1 7 7 0 2 0
parental_status 0 1 9 15 0 2 0
mother_education 0 1 4 22 0 5 0
father_education 0 1 4 22 0 5 0
mother_work 0 1 5 9 0 5 0
father_work 0 1 5 9 0 5 0
reason_school_choice 0 1 5 17 0 4 0
legal_responsibility 0 1 5 6 0 3 0
commute_time 0 1 12 12 0 4 0
weekly_study_time 0 1 7 13 0 4 0
extra_educational_support 0 1 2 3 0 2 0
parental_educational_support 0 1 2 3 0 2 0
private_tutoring 0 1 2 3 0 2 0
extracurricular_activities 0 1 2 3 0 2 0
attended_daycare 0 1 2 3 0 2 0
desire_graduate_education 0 1 2 3 0 2 0
has_internet 0 1 2 3 0 2 0
is_dating 0 1 2 3 0 2 0
good_family_relationship 0 1 4 4 0 3 0
free_time_after_school 0 1 3 8 0 3 0
time_with_friends 0 1 3 8 0 3 0
alcohol_weekdays 0 1 3 8 0 3 0
alcohol_weekends 0 1 3 8 0 3 0
health_status 0 1 4 4 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 16.74 1.22 15 16 17 18 22 ▇▅▅▁▁
school_absence 0 1 3.66 4.64 0 0 2 6 32 ▇▂▁▁▁
grade_1st_semester 0 1 11.40 2.75 0 10 11 13 19 ▁▂▇▇▁
grade_2nd_semester 0 1 11.57 2.91 0 10 11 13 19 ▁▁▇▇▂

Variables cuantitativas

# crear un dataframe con sólo las columnas con valores numéricos
base_datos_num <- base_datos_clean %>% select_if(is.numeric)
base_datos_num
## # A tibble: 649 × 4
##      age school_absence grade_1st_semester grade_2nd_semester
##    <dbl>          <dbl>              <dbl>              <dbl>
##  1    18              4                  0                 11
##  2    17              2                  9                 11
##  3    15              6                 12                 13
##  4    15              0                 14                 14
##  5    16              0                 11                 13
##  6    16              6                 12                 12
##  7    16              0                 13                 12
##  8    17              2                 10                 13
##  9    15              0                 15                 16
## 10    15              0                 12                 12
## # ℹ 639 more rows

Estadísticas más Específicas

# brinda estadísticas más específicas 
estadisticas <- stat.desc(base_datos_num)
estadisticas
##                       age school_absence grade_1st_semester grade_2nd_semester
## nbr.val      6.490000e+02    649.0000000        649.0000000        649.0000000
## nbr.null     0.000000e+00    244.0000000          1.0000000          7.0000000
## nbr.na       0.000000e+00      0.0000000          0.0000000          0.0000000
## min          1.500000e+01      0.0000000          0.0000000          0.0000000
## max          2.200000e+01     32.0000000         19.0000000         19.0000000
## range        7.000000e+00     32.0000000         19.0000000         19.0000000
## sum          1.086700e+04   2375.0000000       7398.0000000       7509.0000000
## median       1.700000e+01      2.0000000         11.0000000         11.0000000
## mean         1.674422e+01      3.6594761         11.3990755         11.5701079
## SE.mean      4.781608e-02      0.1821657          0.1077611          0.1143703
## CI.mean.0.95 9.389318e-02      0.3577064          0.2116031          0.2245812
## var          1.483859e+00     21.5366423          7.5364806          8.4892903
## std.dev      1.218138e+00      4.6407588          2.7452651          2.9136387
## coef.var     7.274973e-02      1.2681484          0.2408323          0.2518247

Histogramas

# crea un histograma para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_histogram(binwidth = 1, fill = "blue") +
    labs(title = col_name, x = col_name, y = "Frequencia")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Densidad

# crea un gráfico de densidad para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_density() +
    labs(x = col_name)
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Gráficos de Barra

# crear gráficos de barra para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_bar(stat = "count", fill = "darkred") +  
    labs(title = col_name, x = col_name, y = "")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Variables cualitativas

# crear un dataframe con sólo las columnas de string
base_datos_str <- base_datos_clean %>% select_if(is.character)
base_datos_str
## # A tibble: 649 × 27
##    school       gender housing_type family_size parental_status mother_education
##    <chr>        <chr>  <chr>        <chr>       <chr>           <chr>           
##  1 Gabriel Per… Female Urban        Above 3     Separated       Higher Education
##  2 Gabriel Per… Female Urban        Above 3     Living Together Primary School  
##  3 Gabriel Per… Female Urban        Up to 3     Living Together Primary School  
##  4 Gabriel Per… Female Urban        Above 3     Living Together Higher Education
##  5 Gabriel Per… Female Urban        Above 3     Living Together High School     
##  6 Gabriel Per… Male   Urban        Up to 3     Living Together Higher Education
##  7 Gabriel Per… Male   Urban        Up to 3     Living Together Lower Secondary…
##  8 Gabriel Per… Female Urban        Above 3     Separated       Higher Education
##  9 Gabriel Per… Male   Urban        Up to 3     Separated       High School     
## 10 Gabriel Per… Male   Urban        Above 3     Living Together High School     
## # ℹ 639 more rows
## # ℹ 21 more variables: father_education <chr>, mother_work <chr>,
## #   father_work <chr>, reason_school_choice <chr>, legal_responsibility <chr>,
## #   commute_time <chr>, weekly_study_time <chr>,
## #   extra_educational_support <chr>, parental_educational_support <chr>,
## #   private_tutoring <chr>, extracurricular_activities <chr>,
## #   attended_daycare <chr>, desire_graduate_education <chr>, …

Gráficos de barra

# crear gráficos de barra para cada columna cualitativa

lapply(names(base_datos_str), function(col_name) {
  col <- base_datos_str[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_bar(stat = "count", fill = "darkred") +  
    labs(title = col_name, x = col_name, y = "")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

Covariaciones

Variables cualitativas y cuantitativas

# Relaciona la nota del primer semestre con la cantidad de alcohol consumida entre semana

# crear el gráfico con fondo blanco y color azul oscuro para las densidades
ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekdays, group = alcohol_weekdays)) + 
  geom_density_ridges(fill = "darkblue", color = "white") +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "white"),
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90")
  )
## Picking joint bandwidth of 0.747

# Relaciona la nota del primer semestre con la cantidad de alcohol consumida en fin de semana

# crear el gráfico con fondo blanco y color azul oscuro para las densidades
ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekends, group = alcohol_weekends)) + 
  geom_density_ridges(fill = "darkblue", color = "white") +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90")
  )
## Picking joint bandwidth of 0.822

# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida entre semana

ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekdays, group = alcohol_weekdays)) + 
  geom_density_ridges(fill = "darkblue", color = "white") +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.678

# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida en fin de semana

ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekends, group = alcohol_weekends)) + 
  geom_density_ridges(fill = "darkblue", color = "white") +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.856

# crear un mapa de calor
create_heatmap <- function(col_name) {
  count_data <- base_datos_str %>% count(alcohol_weekdays, !!sym(col_name))
  ggplot(count_data, aes(x = alcohol_weekdays, y = !!sym(col_name))) +
    geom_tile(aes(fill = n), color = "white") +
    scale_fill_gradient(low = "white", high = "darkblue") +
    labs(title = paste("Comparación de alcohol entre semana con", col_name),
         x = "Alcohol entre semana", y = col_name)
}

# aplicar la función a tods las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekdays")], create_heatmap)

print(heatmap_plots)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

# crear un mapa de calor
create_heatmap <- function(col_name) {
  count_data <- base_datos_str %>% count(alcohol_weekends, !!sym(col_name))
  ggplot(count_data, aes(x = alcohol_weekends, y = !!sym(col_name))) +
    geom_tile(aes(fill = n), color = "white") +
    scale_fill_gradient(low = "white", high = "darkblue") +
    labs(title = paste("Comparación de alcohol en fin de semana con", col_name),
         x = "Alcohol en fin de semana", y = col_name)
}

# aplicar la unción a todas las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekends")], create_heatmap)

print(heatmap_plots)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

Prototipo de Modelación

Tablas de contingencia Alcohol entre semana

# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol entre semana

tablas_contingencias_1 <- lapply(base_datos_str, function(col) {
  table(col, base_datos_str$alcohol_weekdays)
})

print(tablas_contingencias_1)
## $school
##                       
## col                    High Low Moderate
##   Gabriel Pereira        22 379       22
##   Mousinho da Silveira   12 193       21
## 
## $gender
##         
## col      High Low Moderate
##   Female    9 363       11
##   Male     25 209       32
## 
## $housing_type
##        
## col     High Low Moderate
##   Rural   10 168       19
##   Urban   24 404       24
## 
## $family_size
##          
## col       High Low Moderate
##   Above 3   23 408       26
##   Up to 3   11 164       17
## 
## $parental_status
##                  
## col               High Low Moderate
##   Living Together   31 501       37
##   Separated          3  71        6
## 
## $mother_education
##                         
## col                      High Low Moderate
##   High School               8 118       13
##   Higher Education          9 155       11
##   Lower Secondary School    7 173        6
##   None                      0   5        1
##   Primary School           10 121       12
## 
## $father_education
##                         
## col                      High Low Moderate
##   High School               5 117        9
##   Higher Education          7 110       11
##   Lower Secondary School   11 188       10
##   None                      0   7        0
##   Primary School           11 150       13
## 
## $mother_work
##            
## col         High Low Moderate
##   Health       0  45        3
##   Homemaker    8 119        8
##   other       14 229       15
##   Services     9 118        9
##   Teacher      3  61        8
## 
## $father_work
##            
## col         High Low Moderate
##   Health       1  20        2
##   Homemaker    0  39        3
##   other       17 329       21
##   Services    14 150       17
##   Teacher      2  34        0
## 
## $reason_school_choice
##                    
## col                 High Low Moderate
##   Course Preference   13 258       14
##   Near Home           10 127       12
##   Other                7  56        9
##   Reputation           4 131        8
## 
## $legal_responsibility
##         
## col      High Low Moderate
##   Father    8 133       12
##   Mother   20 408       27
##   Other     6  31        4
## 
## $commute_time
##               
## col            High Low Moderate
##   15 to 30 min   11 189       13
##   30 min to 1h    4  42        8
##   More than 1h    3  12        1
##   Up to 15 min   16 329       21
## 
## $weekly_study_time
##                
## col             High Low Moderate
##   2 to 5h         14 278       13
##   5 to 10h         2  94        1
##   More than 10h    2  29        4
##   Up to 2h        16 171       25
## 
## $extra_educational_support
##      
## col   High Low Moderate
##   No    30 510       41
##   Yes    4  62        2
## 
## $parental_educational_support
##      
## col   High Low Moderate
##   No    12 215       24
##   Yes   22 357       19
## 
## $private_tutoring
##      
## col   High Low Moderate
##   No    31 539       40
##   Yes    3  33        3
## 
## $extracurricular_activities
##      
## col   High Low Moderate
##   No    14 296       24
##   Yes   20 276       19
## 
## $attended_daycare
##      
## col   High Low Moderate
##   No    10 109        9
##   Yes   24 463       34
## 
## $desire_graduate_education
##      
## col   High Low Moderate
##   No     8  55        6
##   Yes   26 517       37
## 
## $has_internet
##      
## col   High Low Moderate
##   No     5 135       11
##   Yes   29 437       32
## 
## $is_dating
##      
## col   High Low Moderate
##   No    14 364       32
##   Yes   20 208       11
## 
## $good_family_relationship
##       
## col    High Low Moderate
##   Fair    6  92        3
##   Good   24 440       33
##   Poor    4  40        7
## 
## $free_time_after_school
##           
## col        High Low Moderate
##   High       17 203       26
##   Low         7 135       10
##   Moderate   10 234        7
## 
## $time_with_friends
##           
## col        High Low Moderate
##   High       23 202       26
##   Low         4 182        7
##   Moderate    7 188       10
## 
## $alcohol_weekdays
##           
## col        High Low Moderate
##   High       34   0        0
##   Low         0 572        0
##   Moderate    0   0       43
## 
## $alcohol_weekends
##           
## col        High Low Moderate
##   High       26  74       32
##   Low         4 391        2
##   Moderate    4 107        9
## 
## $health_status
##       
## col    High Low Moderate
##   Fair    9 110        5
##   Good   19 310       28
##   Poor    6 152       10

Diagrama de mosaico

# crear una representación gráfica de las tablas de contingencia

lapply(seq_along(tablas_contingencias_1), function(i) {
  mosaicplot(tablas_contingencias_1[[i]],
              color = TRUE,
              xlab = "Alcohol entre semana",
              ylab = names(tablas_contingencias_1[[i]])[2], 
              main = paste("Alcohol entre Semana y", names(base_datos_str)[i][1]))
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
## 
## [[23]]
## NULL
## 
## [[24]]
## NULL
## 
## [[25]]
## NULL
## 
## [[26]]
## NULL
## 
## [[27]]
## NULL

Prueba Chi-Cuadrado

# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia

chi_cuadrado_1 <- lapply(tablas_contingencias_1, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_1
## $school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.0191, df = 2, p-value = 0.134
## 
## 
## $gender
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 39.436, df = 2, p-value = 2.733e-09
## 
## 
## $housing_type
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.1675, df = 2, p-value = 0.1245
## 
## 
## $family_size
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.3978, df = 2, p-value = 0.3015
## 
## 
## $parental_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.49529, df = 2, p-value = 0.7806
## 
## 
## $mother_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.3106, df = 8, p-value = 0.3168
## 
## 
## $father_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.1102, df = 8, p-value = 0.847
## 
## 
## $mother_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.1653, df = 8, p-value = 0.6287
## 
## 
## $father_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 10.683, df = 8, p-value = 0.2203
## 
## 
## $reason_school_choice
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 12.356, df = 6, p-value = 0.05448
## 
## 
## $legal_responsibility
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.6798, df = 4, p-value = 0.04618
## 
## 
## $commute_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 13.687, df = 6, p-value = 0.03333
## 
## 
## $weekly_study_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 23.815, df = 6, p-value = 0.0005648
## 
## 
## $extra_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.696, df = 2, p-value = 0.4283
## 
## 
## $parental_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 5.7747, df = 2, p-value = 0.05572
## 
## 
## $private_tutoring
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.60637, df = 2, p-value = 0.7385
## 
## 
## $extracurricular_activities
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.7848, df = 2, p-value = 0.4097
## 
## 
## $attended_daycare
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.2162, df = 2, p-value = 0.3302
## 
## 
## $desire_graduate_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.0739, df = 2, p-value = 0.0291
## 
## 
## $has_internet
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.5606, df = 2, p-value = 0.4583
## 
## 
## $is_dating
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.4615, df = 2, p-value = 0.00882
## 
## 
## $good_family_relationship
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.4854, df = 4, p-value = 0.1124
## 
## 
## $free_time_after_school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 15.161, df = 4, p-value = 0.004379
## 
## 
## $time_with_friends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 24.017, df = 4, p-value = 7.925e-05
## 
## 
## $alcohol_weekdays
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
## 
## 
## $health_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.2113, df = 4, p-value = 0.3782

Tablas de contingencia Alcohol fin de semana

# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol en fines de semana
tablas_contingencias_2 <- lapply(base_datos_str, function(col) {
  table(col, base_datos_str$alcohol_weekends)
})

print(tablas_contingencias_2)
## $school
##                       
## col                    High Low Moderate
##   Gabriel Pereira        87 259       77
##   Mousinho da Silveira   45 138       43
## 
## $gender
##         
## col      High Low Moderate
##   Female   37 275       71
##   Male     95 122       49
## 
## $housing_type
##        
## col     High Low Moderate
##   Rural   39 120       38
##   Urban   93 277       82
## 
## $family_size
##          
## col       High Low Moderate
##   Above 3   85 288       84
##   Up to 3   47 109       36
## 
## $parental_status
##                  
## col               High Low Moderate
##   Living Together  117 341      111
##   Separated         15  56        9
## 
## $mother_education
##                         
## col                      High Low Moderate
##   High School              33  75       31
##   Higher Education         35 110       30
##   Lower Secondary School   28 128       30
##   None                      2   3        1
##   Primary School           34  81       28
## 
## $father_education
##                         
## col                      High Low Moderate
##   High School              30  70       31
##   Higher Education         29  80       19
##   Lower Secondary School   39 133       37
##   None                      0   7        0
##   Primary School           34 107       33
## 
## $mother_work
##            
## col         High Low Moderate
##   Health      10  26       12
##   Homemaker   27  84       24
##   other       46 168       44
##   Services    31  75       30
##   Teacher     18  44       10
## 
## $father_work
##            
## col         High Low Moderate
##   Health       6  17        0
##   Homemaker    7  31        4
##   other       71 221       75
##   Services    45  99       37
##   Teacher      3  29        4
## 
## $reason_school_choice
##                    
## col                 High Low Moderate
##   Course Preference   59 181       45
##   Near Home           32  89       28
##   Other               19  39       14
##   Reputation          22  88       33
## 
## $legal_responsibility
##         
## col      High Low Moderate
##   Father   32  94       27
##   Mother   92 280       83
##   Other     8  23       10
## 
## $commute_time
##               
## col            High Low Moderate
##   15 to 30 min   43 130       40
##   30 min to 1h   12  31       11
##   More than 1h    7   9        0
##   Up to 15 min   70 227       69
## 
## $weekly_study_time
##                
## col             High Low Moderate
##   2 to 5h         52 193       60
##   5 to 10h         5  72       20
##   More than 10h    6  25        4
##   Up to 2h        69 107       36
## 
## $extra_educational_support
##      
## col   High Low Moderate
##   No   123 351      107
##   Yes    9  46       13
## 
## $parental_educational_support
##      
## col   High Low Moderate
##   No    64 145       42
##   Yes   68 252       78
## 
## $private_tutoring
##      
## col   High Low Moderate
##   No   120 375      115
##   Yes   12  22        5
## 
## $extracurricular_activities
##      
## col   High Low Moderate
##   No    61 210       63
##   Yes   71 187       57
## 
## $attended_daycare
##      
## col   High Low Moderate
##   No    33  71       24
##   Yes   99 326       96
## 
## $desire_graduate_education
##      
## col   High Low Moderate
##   No    20  35       14
##   Yes  112 362      106
## 
## $has_internet
##      
## col   High Low Moderate
##   No    27 101       23
##   Yes  105 296       97
## 
## $is_dating
##      
## col   High Low Moderate
##   No    87 248       75
##   Yes   45 149       45
## 
## $good_family_relationship
##       
## col    High Low Moderate
##   Fair   26  55       20
##   Good   92 314       91
##   Poor   14  28        9
## 
## $free_time_after_school
##           
## col        High Low Moderate
##   High       66 126       54
##   Low        26 106       20
##   Moderate   40 165       46
## 
## $time_with_friends
##           
## col        High Low Moderate
##   High       95 106       50
##   Low        14 154       25
##   Moderate   23 137       45
## 
## $alcohol_weekdays
##           
## col        High Low Moderate
##   High       26   4        4
##   Low        74 391      107
##   Moderate   32   2        9
## 
## $alcohol_weekends
##           
## col        High Low Moderate
##   High      132   0        0
##   Low         0 397        0
##   Moderate    0   0      120
## 
## $health_status
##       
## col    High Low Moderate
##   Fair   24  78       22
##   Good   85 207       65
##   Poor   23 112       33
# crear una representación gráfica de las tablas de contingencia

lapply(seq_along(tablas_contingencias_2), function(i) {
  mosaicplot(tablas_contingencias_2[[i]],
              color = TRUE,
              xlab = "Alcohol Fin de Semana",
              ylab = names(tablas_contingencias_2[[i]])[2], 
              main = paste("Alcohol Fin de Semana y", names(base_datos_str)[i][1]))
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
## 
## [[23]]
## NULL
## 
## [[24]]
## NULL
## 
## [[25]]
## NULL
## 
## [[26]]
## NULL
## 
## [[27]]
## NULL

Prueba Chi-Cuadrado

Se escogió el nivel estándar de significancia donde alpha = 5, esto quiere decir que si el p-valor es menor a 5%, entonces, la probabilidad de que esas dos variables sean independientes es muy baja, por lo que se rechaza H_0.

# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia
chi_cuadrado_2 <- lapply(tablas_contingencias_2, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_2
## $school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.085819, df = 2, p-value = 0.958
## 
## 
## $gender
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 69.654, df = 2, p-value = 7.495e-16
## 
## 
## $housing_type
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.14167, df = 2, p-value = 0.9316
## 
## 
## $family_size
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.171, df = 2, p-value = 0.2049
## 
## 
## $parental_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.8628, df = 2, p-value = 0.1449
## 
## 
## $mother_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 10.255, df = 8, p-value = 0.2476
## 
## 
## $father_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.9856, df = 8, p-value = 0.266
## 
## 
## $mother_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.0431, df = 8, p-value = 0.532
## 
## 
## $father_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 18.679, df = 8, p-value = 0.01668
## 
## 
## $reason_school_choice
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.8145, df = 6, p-value = 0.3383
## 
## 
## $legal_responsibility
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.0722, df = 4, p-value = 0.8987
## 
## 
## $commute_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 8.0027, df = 6, p-value = 0.2379
## 
## 
## $weekly_study_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 37.497, df = 6, p-value = 1.409e-06
## 
## 
## $extra_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.4215, df = 2, p-value = 0.298
## 
## 
## $parental_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.8137, df = 2, p-value = 0.03314
## 
## 
## $private_tutoring
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.0945, df = 2, p-value = 0.2128
## 
## 
## $extracurricular_activities
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.8354, df = 2, p-value = 0.3994
## 
## 
## $attended_daycare
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.1753, df = 2, p-value = 0.2044
## 
## 
## $desire_graduate_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.3507, df = 2, p-value = 0.1136
## 
## 
## $has_internet
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.7657, df = 2, p-value = 0.2509
## 
## 
## $is_dating
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.53281, df = 2, p-value = 0.7661
## 
## 
## $good_family_relationship
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 5.057, df = 4, p-value = 0.2815
## 
## 
## $free_time_after_school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 18.865, df = 4, p-value = 0.0008356
## 
## 
## $time_with_friends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 94.004, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekdays
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
## 
## 
## $health_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.4814, df = 4, p-value = 0.1125

V de Cramer

Este modelo permite responder parcialmente o totalmente la pregunta de investigación debido a que brinda información sobre qué factores poseen una mayor influencia en el consumo de alcohol en estudiantes adolescentes. Para poder interpretar los resultados, se puede observar el output que se generó al comparar las columnas alcohol_weekday y alcohol_weekend con el resto de las variables

# asegurarse que los datos sean factores

base_datos_str[] <- lapply(base_datos_str, as.factor)

# generar la V de Cramer para cada columna con alcohol_weekdays

v_cramer_entre_semana <- sapply(base_datos_str, function(col) {
  cramerV(base_datos_str$alcohol_weekdays, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})

print(v_cramer_entre_semana)
##                       school.Cramer V                       gender.Cramer V 
##                               0.07869                               0.24650 
##                 housing_type.Cramer V                  family_size.Cramer V 
##                               0.08013                               0.06078 
##              parental_status.Cramer V             mother_education.Cramer V 
##                               0.02763                               0.08469 
##             father_education.Cramer V                  mother_work.Cramer V 
##                               0.05627                               0.06892 
##                  father_work.Cramer V         reason_school_choice.Cramer V 
##                               0.09072                               0.09757 
##         legal_responsibility.Cramer V                 commute_time.Cramer V 
##                               0.08636                               0.10270 
##            weekly_study_time.Cramer V    extra_educational_support.Cramer V 
##                               0.13550                               0.05112 
## parental_educational_support.Cramer V             private_tutoring.Cramer V 
##                               0.09433                               0.03057 
##   extracurricular_activities.Cramer V             attended_daycare.Cramer V 
##                               0.05244                               0.05844 
##    desire_graduate_education.Cramer V                 has_internet.Cramer V 
##                               0.10440                               0.04904 
##                    is_dating.Cramer V     good_family_relationship.Cramer V 
##                               0.12070                               0.07594 
##       free_time_after_school.Cramer V            time_with_friends.Cramer V 
##                               0.10810                               0.13600 
##             alcohol_weekdays.Cramer V             alcohol_weekends.Cramer V 
##                               1.00000                               0.36380 
##                health_status.Cramer V 
##                               0.05696
# generar la V de Cramer para cada columna con alcohol_weekends

v_cramer_fin_semana <- sapply(base_datos_str, function(col) {
  cramerV(base_datos_str$alcohol_weekends, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})

print(v_cramer_fin_semana)
##                       school.Cramer V                       gender.Cramer V 
##                               0.01150                               0.32760 
##                 housing_type.Cramer V                  family_size.Cramer V 
##                               0.01477                               0.06990 
##              parental_status.Cramer V             mother_education.Cramer V 
##                               0.07715                               0.08889 
##             father_education.Cramer V                  mother_work.Cramer V 
##                               0.08771                               0.07366 
##                  father_work.Cramer V         reason_school_choice.Cramer V 
##                               0.12000                               0.07246 
##         legal_responsibility.Cramer V                 commute_time.Cramer V 
##                               0.02874                               0.07852 
##            weekly_study_time.Cramer V    extra_educational_support.Cramer V 
##                               0.17000                               0.06108 
## parental_educational_support.Cramer V             private_tutoring.Cramer V 
##                               0.10250                               0.06905 
##   extracurricular_activities.Cramer V             attended_daycare.Cramer V 
##                               0.05318                               0.06995 
##    desire_graduate_education.Cramer V                 has_internet.Cramer V 
##                               0.08188                               0.06528 
##                    is_dating.Cramer V     good_family_relationship.Cramer V 
##                               0.02865                               0.06242 
##       free_time_after_school.Cramer V            time_with_friends.Cramer V 
##                               0.12060                               0.26910 
##             alcohol_weekdays.Cramer V             alcohol_weekends.Cramer V 
##                               0.36380                               1.00000 
##                health_status.Cramer V 
##                               0.07592